# -*- coding:utf-8 -*-
# @Author: shenyuyu
# @Time: 2023/6/28 17:19
# @File: qu_4_spark_sql_api_csv_dataframe.py

from pyspark.sql import SparkSession
from pyspark.sql.types import StructType, StringType, IntegerType

if __name__ == '__main__':
    spark = SparkSession.builder.appName("a").master("local[*]").getOrCreate()
    df = spark.read.format("csv")\
        .option("header", True)\
        .option("sep", ";")\
        .option("encoding", "utf-8")\
        .schema("name String, age Int, job String")\
        .load("file:///tmp/pycharm_project_161/data/sql/people.csv")
    df.show()

    schema = StructType().add("name", StringType(), True).add("age", IntegerType(), True).add("job", StringType(), True)
    df = spark.read.format("csv") \
        .option("header", True) \
        .option("sep", ";") \
        .option("encoding", "utf-8") \
        .schema(schema=schema)\
        .load("file:///tmp/pycharm_project_161/data/sql/people.csv")
    df.show()